# =============================================================================
# =============================================================================
# Create the cohorts to analyze ACS data.
# =============================================================================
# =============================================================================


# =============================================================================
# Import the tcp panel (for zip codes)
# =============================================================================
geo = ts['geog']
tcp = pd.read_parquet(cdd['p_d_geo_tcp'] + r'\tcp_25k_zip_q.parquet')


# =============================================================================
# Subset the tcp panel to the desired radius around treated zones.
tcps = tcp[(tcp.distance<10000)|((tcp.treated==1)&(tcp.distance<10000))|((tcp.ctype==0)&(tcp.distance_t2cf<25000))].copy()                   #zip codes are within 7.5km of a plasma center opening (present or counterfactual)
tcps = tcps[tcps['open'] > dt.datetime(2014,6,30)]

#Keep the 10 closest counterfactuals.
tcps=tcps[tcps.distance_t2cf_rank<=20]
tcps=tcps[(tcps.groupby('cohort')['treated'].transform('max')!=tcps.groupby('cohort')['treated'].transform('min'))]
tcpss = tcps[(tcps.distance_t2cf_rank<=tcps.groupby(['cohort'])['distance_t2cf_rank'].transform(lambda x: x.drop_duplicates().nsmallest(10).iloc[-1]))].copy()
tcpss = tcpss[tcpss.groupby('cohort')['treated'].transform('sum')>0]
tcpss['year'] = tcpss['date'].dt.year
tcpss['etimey'] = np.floor(tcpss['etime'] / 4)
tcpss.info(memory_usage='deep')



# =============================================================================
# Code to create cohorts for analyzing ACS data
# =============================================================================
#Import the controls
zip_controls_aug = pd.read_parquet(cdd['p_d_acs'] + r'\ZCTA\acs_2014_2022_s3.parquet')

#Merge the tcp and treatment to controls for demographic analysis.
ztcp_vars = ['cohort', 'zip', 'year', 'date', 'open', 'distance', 'intensity_absdelta_tcf', 'open_cf', 'cohort_cf', 
              'distance_t2cf', 'ctype', 'etime','etimey', 'treated', 'distance_t2cf_rank']
zdemo_vars = ['zip', 'year', 'population', 'male_p', 'white_p', 'educ_sc_p', 'educ_bach_p', 
              'mhi_quin1', 'mhi_quin2', 'mhi_quin3', 'mhi_quin4', 'mhi_quin5', 'inc_hhmean', 'ownocc_p', 'gini',
              'pvf100_p', 'pvf200_p', 'insured_p','empt_nw_p','empt_nft_p','empt_ft_p',
              'snap_p', 'aid_p', 'pubai_p', 'comm_public_p', 'comm_car_p', 'comm_timemean',
              'state', 'sqkm', 'pop2sqkm']
dens_vars = ['zip', 'year', 'pop2sqkm_wq10']
zip_full_demo = tcpss[tcpss['date'].dt.month==12][ztcp_vars]
zip_full_demo = pd.merge(zip_full_demo, zip_controls_aug[zdemo_vars],how='left',on=['zip','year'])
zip_full_demo['openy_pre'] = zip_full_demo['open'].dt.year - 1
zip_full_demo = pd.merge(zip_full_demo, zip_controls_aug[dens_vars].rename(columns={'year':'openy_pre'}), how='left',on=['zip','openy_pre'])
zip_full_demo.to_parquet(cdd['p_d_acs'] + r'\Analysis\acs_zip_analysis.parquet')











